## Recebe apenas a coluna de paises
df_paises <- df_dimensions %>%
# df_paises <- df_dimensions_sample %>%
dplyr::select(id, research_org_country_names) %>%
dplyr::filter(research_org_country_names != "")
paises <- df_paises$research_org_country_names
## Separa em uma lista de mais de um elemento quando possui mais de um país
paises_split <- paises %>%
stringr::str_split(., ';')
## remove todos os caractéres menos letras e números
#list <- lapply(paises, stringr::str_replace_all, ";", "0")
## Apenas valores únicos, para listar todos os países (sem repetição)
unique_values <- unique(rapply(paises_split, function(x) head(x, 30)))
altimetrics.score - 248763 de 443511 linhas em branco (NA, NULL)
clinical_trial_ids - 430207 de 443511 linhas em branco (NA, NULL ou "")
times_cited - 281 de 443511 linhas em branco (NA, NULL)
| Name | df_dimensions$altmetrics…. |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| data | 248763 | 0.44 | 62.57 | 449.78 | 1 | 2 | 5 | 17 | 35478 | ▇▁▁▁▁ |
## Warning: Ignoring 248763 observations
| Name | df_dimensions$clinical_tr… |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 0 | 4907 | 430207 | 9763 | 0 |
## Warning: Ignoring 281 observations
## [1] ""
## 10% 20% 30% 40% 50% 60% 70% 80% 90% 95% 100%
## 0 0 0 0 0 0 1 2 6 15 19868
Ano - 0 de 443511 linhas em branco (NA, NULL)
País - 136992 de 443511 linhas em branco (NA, NULL ou "") Nesse caso, verifica se existe na linha um ou mais países, ou se a linha está vazia.
Tipo de publicação - 0 de 443511 linhas em branco (NA, NULL). Podem ser do tipo: article, book, chapter, monograph, preprint ou proceeding
| Name | df_dimensions$type |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 4 | 10 | 0 | 6 | 0 |
## Carregando pacotes exigidos: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Carregando pacotes exigidos: sp
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: /usr/share/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /usr/share/proj
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
Funders - 381381 de 443511 linhas em branco (NA, NULL)
| Name | df_dimensions$funder_orgs |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 0 | 377 | 381381 | 17638 | 0 |
| Name | df_dimensions$type |
| Number of rows | 443511 |
| Number of columns | 1 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| data | 0 | 1 | 4 | 10 | 0 | 6 | 0 |
## [1] "Temos 0 linhas em branco, na coluna de afiliações dos autores"